requires : spdiv, spinfo (spinfo is the file containing information about all species, spdiv is the collected data.)

require(naniar)
require(ggplot2)
require(grid)
require(VennDiagram)
require(visdat)

Load and merge dataset

Note : this script is based on the Exploratoreis diversity dataset, which is read in with the script analysis_nonpublic.R. It reads in the two datasets spdiv and spinfo.

Dataset Cleaning

From cleaning, getting numbers : - 334700 species in the dataset - 28 different trophic levels - type = presenceabsence : plant pathogen, ant omnivore

# delete Plot_bexis (old plot names)
spdiv[, Plot_bexis := NULL]
# spdiv[, c("Dataversion", "DataID") := NULL]
spdiv <- merge(spdiv, spinfo, by = "Species", all.x=T)
# now I can remove the original datasets, so I save cpu:
rm(spinfo); gc()
##            used  (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells  3360100 179.5    5033091  268.8   5033091  268.8
## Vcells 75180919 573.6  218782701 1669.2 355987468 2716.0

Set to presence-absence

spdiv[value != 0, value := 1]

Select trophic levels

Group to 18 trophic levels as needed for analysis. (see also info_diversity.ods)

# take out trophic levels which are not used in analysis
spdiv <- spdiv[!Trophic_level %in% c("detritivore", "protist.unknown", "soilfungi.other", "vert.herb", "protist.parasite.nonplant")]
# merge the trophic levels which are merged in analysis
spdiv[Trophic_level == "herbivore.snail", Trophic_level := "herbivore"]
spdiv[Trophic_level == "ant.omnivore", Trophic_level := "omnivore"]
spdiv[Trophic_level == "omnivore.snail", Trophic_level := "omnivore"]
spdiv[Trophic_level == "protist.mainly.bacterivore", Trophic_level := "protist.bacterivore"]
# unique(spdiv$Trophic_level)

show years

unique(spdiv[, .(Trophic_level, Year)])
length(unique(spdiv$Species)) # 309734 species in the dataset (incl. OTU)

Get overview of amount of species per trophic level

for(t in unique(spdiv$Trophic_level)){
  print(c(t, length(unique(spdiv[Trophic_level == t, Species]))))
}

Store trophic levels in vector

trlevels <- sort(unique(spdiv$Trophic_level))

Check if diversity across years is correlated

Create new column with trophic level and year

spdiv[, trlevel_year := paste(Trophic_level, Year, sep = "")]

Correlation plot of all trophic levels and years? Or better betadiversity among years?

# m <- cor(spdiv[, .(Species, value, trlevel_year)], use = "pariwise.complete.obs")
# 
# M <- cor(small_grlfuns[, !colnames(grlfuns) %in% c("Explo","Plotn", "Plot"), with=F], use="pairwise.complete.obs")
# corrplot::corrplot(M,type="lower",addCoef.col = "black",method="color",diag=F, tl.srt=1, tl.col="black", mar=c(1,0,1,0), number.cex=0.6)

# testing with 1 trophic level, 2 years
# for presence absence
for(trlevel in c("autotroph", "secondary.consumer", "tertiary.consumer")){
  pdf(paste("corr_diversity_across_years_", trlevel, ".pdf", sep=""), paper = "a4r")
  par(mfrow=c(12,12))
  test <- spdiv[Trophic_level == trlevel, .(Species, value, trlevel_year, Plot, Year)]
  for(p in unique(test$Plot)){
    a <- data.table::dcast.data.table(test[Plot == p], Year ~ Species, fill = 0)
    a[is.na(a)] <- 0
    a <- t(a)
    colnames(a) <- a[1,]
    a <- a[-1,]
    a <- apply(a, 2, as.numeric)
    # corrplot::corrplot(cor(a), type="lower", diag = F)
    a[a != 0] <- 1
    if(sum(is.na(cor(a))) >= 1) next
    corrplot::corrplot(cor(a), type="lower", diag = T, method = "square", tl.col = "black", order = "alphabet", tl.cex = 0.7, main = p)
  }
  dev.off()
}
# note : not possible OTU, because it can't be compared across years. The OTU identities always change.
# note : omnivore, herbivores different years are different species --> no overlap

# tertiary consumers : special case. take the 67 species measured in 2008, 2011 and 2012 and take the 11 suppl. species out of years 2009 and 2010.
tert <- spdiv[Trophic_level == "tertiary.consumer", .(Species, value, trlevel_year, Plot, Year)]
# find the 11 species which are only present in 2009 and 2010
unique(tert[Year == "2009", Species])[which(!unique(tert[Year == "2009", Species]) %in% unique(tert[Year == "2008", Species]))]

Combine years

autotrophs

Years 2008 to 2016. Lichens and Bryophytes only 2009 (119 species, 137 plots)

year no. species no. Plots
2008 344 147
2009 463 150
2009 l 119 150
2009 p 344 150
2010 344 150
2011 344 150
2012 344 149
2013 344 149
2014 344 148
2015 344 150
2016 344 150
autotroph <- spdiv[Trophic_level == "autotroph"]

# how many species and how many plots were measured in the different years?
length(unique(autotroph[Year == "2008", Plot])) # Changed year and tested for all years for reporting in the table
## [1] 147
# get vector with lichen species to examine lichens separately
lichenspecies <- setdiff(unique(autotroph[Year == "2009", Species]), unique(autotroph[Year == "2008", Species]))
    # examine lichens and plants separately - same number of plots and expected number of species?
length(unique(autotroph[!Species %in% lichenspecies, Species]))
## [1] 344
length(unique(autotroph[Species %in% lichenspecies, Plot]))
## [1] 150

Plots identities

# are the same plots missing in all years?
testplotpresence <- unique(autotroph[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
# no, different plots are missing.
rm(testplotpresence)

If a plant species is present in at least 1 year, it is considered to be present. Underlying assumption : plant species composition is quite stable, therefore it is representative to assess their presence in a large time span.

Test : check how different the species composition is if considering all years.

Assessing presence of plant species in all years. Lichen and Bryophyte species are taken directly from year 2009, as this is the only year.

plants <- autotroph[!Species %in% lichenspecies, ]
lichens <- autotroph[Species %in% lichenspecies,]
# sum(length(unique(plants$Species)), length(unique(lichens$Species))) # check if the sum of species is 463 to be sure to have all autotroph species

# calculate the mean of all value (species presence/ absence). If it is not zero, the species was present in at least one year and the value can be set to 1.
plants <- aggregate(value ~ Species + Plot, plants, function(x) mean(x, na.rm=T))
plants <- data.table::data.table(plants)
# nrow(plants) == 344*150 # TRUE : there are 344 species per plot in the new dataset
# Check how many plant species have been present / absent in how many years
hist(plants$value, main="Presence of plant species among years", sub="The plot indicates if a plant species was either present in no year, \n in 1 years , ... , in all 9 years. Most species are never present", xlab = "")

# set the numbers which are not 0 or 1 to 1 (if value > 0, that means that plant has been present at least in one year)
plants[value != 0, value := 1]

# add the lichens dataset to the plants dataset
autotroph <- rbind(plants, lichens[, .(Species, Plot, value)])
nrow(autotroph) == 150 * 344 + 150 * 119 # TRUE : there are 119 lichen and 344 plant species in all 150 plots.
## [1] TRUE
hist(autotroph$value)

hist(lichens$value)

hist(plants$value)

Although working with a range of 9 years, still many plant species are not present in many plots. This indicates that the assumption of stable autotroph species was correct.

# remove lichens and plants, not used any more
rm(plants) ; rm(lichens); rm(lichenspecies)
# take out autotrophs from spdiv
spdiv <- spdiv[!Trophic_level == "autotroph"]

herbivore

herbivore <- spdiv[Trophic_level == "herbivore"]

Note : the code below can be deleted as soon as the new diversity dataset is available.

# Arthropod herbivores consist of 3 Groups, 2 of them contain missing data, one not.
# We can only take the plots present in all groups --> reduce the number of plots
unique(herbivore[Year == "2008", Group_fine])
## [1] "Hemiptera"   "Coleoptera"  "Hymenoptera" "Orthoptera"
length(unique(herbivore[Year == "2008" & Group_fine == "Hymenoptera", Plot]))
## [1] 150
Dataset Hemiptera Coleoptera Hymenoptera Orthoptera
no Plots 139 139 150 139
herbivore_plotset <- unique(herbivore[Year == "2008" & Group_fine == "Orthoptera", Plot])
# check if the plot set is consistent
sum(herbivore_plotset != unique(herbivore[Year == "2008" & Group_fine == "Hemiptera", Plot])) # all the same
## [1] 0
sum(herbivore_plotset != unique(herbivore[Year == "2008" & Group_fine == "Coleoptera", Plot])) # all the same
## [1] 0
# select the plot set for herbivores
# select all rows which are EITHER from 2008 (arthropods) and from the selected plot list, OR they are from 2017
# species from 2017 don't have to be part of the selected plots
herbivore <- herbivore[Plot %in% herbivore_plotset & Year == "2008" | Year == "2017", ]
rm(herbivore_plotset)
testplotpresence <- unique(herbivore[, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override

rm(testplotpresence)

Snails were only measured in 134 plots. They contain 39 species. Insects were measured in 139 plots, they contain 401 species.

length(unique(herbivore[Year == "2008", Species]))
length(intersect(herbivore[Year == "2008", Plot], herbivore[Year == "2017", Plot]))
Year | 2008 | 2017 | both | Group | insects | snails | both | ———- | ——– | ——- | —— | no Plots | 139 | 134 | 124 | no Species | 401 | 39 | 440 |

Too many missing plots in snails dataset. Take insects dataset only.

herbivore <- herbivore[Year == "2008", .(Species, Plot, value)]
nrow(herbivore) == 139 * 401 # 401 species in 139 plots present
## [1] TRUE
# remove from spdiv
spdiv <- spdiv[!Trophic_level == "herbivore"]

Omnivore

Consists of arthropods dataset from 2008, ants dataset from 2014_2015, and snails dataset from 2017.

omnivore <- spdiv[Trophic_level == "omnivore"]
testplotpresence <- unique(omnivore[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override

rm(testplotpresence)

Many plots are missing.

subset arthropods ants snails all arth&sn
Year 2008 14 15 2017 all 08 & 17
no Plots 139 110 134 97 124
no Species 14 20 21 55 35
# length(intersect(omnivore[Year == "2008", Plot], omnivore[Year == "2017", Plot]))
# length(intersect(intersect(omnivore[Year == "2008", Plot], omnivore[Year == "2017", Plot]), omnivore[Year == "2014_2015", Plot]))

Only took the arthropod dataset, as snails and ants have too many missing plots.

omnivore <- omnivore[Year == "2008", .(Species, Plot, value)]
# omnivore_ant <- omnivore[Year == "2014_2015", .(Species, Plot, value)]
# omnivore_snail <- omnivore[Year == "2017", .(Species, Plot, value)]
nrow(omnivore) == 139 * 14 # expected number of rows present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "omnivore"]
#TODO : many plots do not contain any species - take out?
test <- aggregate(value ~ Plot, omnivore, sum)
barplot(height = test$value)
abline(h = mean(test$value), col = "red")
abline(h = median(test$value), col = "green")

print(test$Plot[which(test$value == 0)])
##   [1] "AEG03" "AEG04" "AEG07" "AEG08" "AEG09" "AEG11" "AEG12" "AEG13"
##   [9] "AEG18" "AEG20" "AEG21" "AEG22" "AEG25" "AEG26" "AEG27" "AEG28"
##  [17] "AEG30" "AEG31" "AEG32" "AEG33" "AEG34" "AEG35" "AEG36" "AEG38"
##  [25] "AEG39" "AEG40" "AEG44" "AEG47" "AEG48" "AEG49" "HEG01" "HEG02"
##  [33] "HEG03" "HEG04" "HEG05" "HEG06" "HEG07" "HEG08" "HEG10" "HEG11"
##  [41] "HEG12" "HEG14" "HEG15" "HEG16" "HEG17" "HEG19" "HEG21" "HEG23"
##  [49] "HEG24" "HEG26" "HEG27" "HEG33" "HEG36" "HEG38" "HEG39" "HEG40"
##  [57] "HEG43" "HEG44" "HEG45" "HEG48" "HEG49" "HEG50" "SEG01" "SEG02"
##  [65] "SEG03" "SEG04" "SEG05" "SEG06" "SEG07" "SEG08" "SEG09" "SEG10"
##  [73] "SEG11" "SEG12" "SEG13" "SEG14" "SEG15" "SEG17" "SEG18" "SEG19"
##  [81] "SEG20" "SEG21" "SEG22" "SEG23" "SEG24" "SEG25" "SEG26" "SEG27"
##  [89] "SEG31" "SEG32" "SEG33" "SEG34" "SEG35" "SEG36" "SEG37" "SEG38"
##  [97] "SEG39" "SEG41" "SEG42" "SEG43" "SEG44" "SEG47" "SEG48" "SEG49"
## [105] "SEG50"
#TODO : take out for the moment
trlevels <- trlevels[-which(trlevels == "omnivore")]
rm(omnivore)

Secondary consumers

2008 dataset from

secondary.consumer <- spdiv[Trophic_level == "secondary.consumer"]
testplotpresence <- unique(secondary.consumer[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override

rm(testplotpresence)

Only 2008 dataset has some missing values

Year 2008 2011
no Plots 139 150*
no Species 170 4

Species set does not overlap.

  • Note : possibly, set from 2011 was not complete neither? But if the same error as in all insect datasets –> selecting plots from 2008 will solve the issue. In case 2011 Data is used, check this!
# length(unique(secondary.consumer[Year == "2008", Species]))
any(unique(secondary.consumer[Year == "2008", Species]) %in% unique(secondary.consumer[Year == "2011", Species]))

Select the Plot set from 2008 and merge the datasets together.

secondary.consumer <- secondary.consumer[Plot %in% unique(unique(secondary.consumer[Year == "2008", Plot])), .(Species, Plot, value)]
nrow(secondary.consumer) == 4 * 139 + 170 * 139 # expected number of rows present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "secondary.consumer"]

Tertiary consumer

tertiary.consumer <- spdiv[Trophic_level == "tertiary.consumer"]
testplotpresence <- unique(tertiary.consumer[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override

rm(testplotpresence)
Year 2008 2009 2010 2011 2012
subset birds birds bats birds bats birds birds
no Plot 150 150+149 150+150 150 150
no Species 67 78= 67+11 78= 67+11 67 67
# unique(tertiary.consumer[Year == "2012", Group_fine])
# length(unique(tertiary.consumer[Year == "2012", Plot]))

# length(intersect(unique(tertiary.consumer[Year == "2009", Species]), unique(tertiary.consumer[Year == "2010", Species])))
batspecies <- setdiff(unique(tertiary.consumer[Year == "2009", Species]), unique(tertiary.consumer[Year == "2011", Species]))

The Bird and Bat species of the different years are the same.

Take average of Species occurrence and check how often species occur. We assume that birds and bats are difficult to measure, therefore having a large time window makes sense. A species is considered to be present in a plot, if it is present at least in one year.

birds <- aggregate(value ~ Species + Plot, tertiary.consumer[Group_fine == "Birds"], function(x) mean(x, na.rm=T))
birds <- data.table::data.table(birds)
# nrow(birds) == 67 * 150 # TRUE : 67 species in 150 plots
hist(birds$value, main="Presence of bird species among years")

# If species is present at least in one year, take it as present
birds[value > 0, value := 1]
nrow(birds) == 67 * 150 # expected number of rows present (67 species in 150 plots)
## [1] TRUE
bats <- aggregate(value ~ Species + Plot, tertiary.consumer[Group_fine == "Bats" & Species %in% batspecies], function(x) mean(x, na.rm=T))
bats <- data.table::data.table(bats)
# nrow(bats) == 11 * 150
hist(bats$value)

# If species is present at least in one year, take it as present
bats[value > 0, value := 1]
nrow(bats) == 11 * 150 # expected number of rows present (11 species in 150 plots)
## [1] TRUE
# combine the bird and bat dataset
tertiary.consumer <- rbind(birds, bats)

nrow(tertiary.consumer) == 150 * (11 + 67)
## [1] TRUE
rm(bats); rm(birds); rm(batspecies)
spdiv <- spdiv[!Trophic_level == "tertiary.consumer"]

Bacteria RNA

bacteria.RNA <- spdiv[Trophic_level == "bacteria.RNA"]
testplotpresence <- unique(bacteria.RNA[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~ Year))
## Using 'Plot' as value column. Use 'value.var' to override

rm(testplotpresence)

Not visible in the plot, but there are only 148 plots for both years.

Check if OTU names are the same in both years. No obvious overlap.

length(setdiff(unique(bacteria.RNA[Year == "2014", Species]), unique(bacteria.RNA[Year == "2011", Species])))
# length(unique(bacteria.RNA$Species))
# The OTU names prefix is either bac11 or bac14 ...
# grep("01ece679a6294aadf97b931ce39a539a", bacteria.RNA$Species)
bacteria.RNA[grep("01ece679a6294aadf97b931ce39a539a", bacteria.RNA$Species), Year]

The OTUs can be compared across the years. However, we will only work with the 2011 dataset in order to be more consistent with the years. Replace the prefix by bac_ instead of bac11_ and bac14_

# In case the years would be combined
bacteria.RNA[grep("bac11_", Species), Species := gsub("bac11_", "bac14_", Species)]
year 2011 2014
no Plots 148 148
no OTUs 140510 139691
length(unique(bacteria.RNA[Year == "2014", Species])) # 139691
length(setdiff(unique(bacteria.RNA[Year == "2011", Species]), unique(bacteria.RNA[Year == "2014", Species]))) # 34435 # 35254
length(intersect(unique(bacteria.RNA[Year == "2011", Species]), unique(bacteria.RNA[Year == "2014", Species]))) # 105256

2014 : 34435 OTUs unique to 2014, 105256 overlapping of total 139691. ~ 1/4 to 3/4 2011 : 35254 OTUs unique to 2011, 105256 overlapping of total 140510. ~ 1/4 to 3/4

grid::grid.newpage()
VennDiagram::draw.pairwise.venn(area1 = 34435 + 105256, area2 = 35254 + 105256, cross.area = 105256, category = c("2011 OTU", "2014 OTU"))

## (polygon[GRID.polygon.2421], polygon[GRID.polygon.2422], polygon[GRID.polygon.2423], polygon[GRID.polygon.2424], text[GRID.text.2425], text[GRID.text.2426], text[GRID.text.2427], text[GRID.text.2428], text[GRID.text.2429])

Select 2011 dataset.

bacteria.RNA <- bacteria.RNA[Year == "2011", .(Species, Plot, value)]
# The bacteria RNA dataset had been reduced: all species-plot combinations with 0 were removed.
# reconstruct them (code from Caterina)
bacteria.RNA <- data.table::setDT(bacteria.RNA)[data.table::CJ(Species=Species,Plot=Plot,unique=T), on=.(Species, Plot)]
bacteria.RNA[is.na(value), value := 0 ]
# # add NA values for the two missing plots
# bac_missing <- rbind(data.table::data.table(Species = unique(bacteria.RNA[Plot == "AEG21", Species]), Plot = "AEG33", value = NA),
#                      data.table::data.table(Species = unique(bacteria.RNA[Plot == "AEG21", Species]), Plot = "AEG34", value = NA))
nrow(bacteria.RNA) == 148 * 140510 # expected number of plots present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "bacteria.RNA"]

protist.bacterivore

2011, 2011_2012, 2017 datasets

protist.bacterivore <- spdiv[Trophic_level == "protist.bacterivore"]
testplotpresence <- unique(protist.bacterivore[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override

rm(testplotpresence)

Only 1 plot missing, in 2011

Are the OTU the same across years? Datasets 2011 and 2017 are comparable with each others, but not with dataset 2011_2012. The name appendices for the latter need to be preserved, the other prefixes can be removed.

protist.bacterivore[grep("_protist24426", Species), Species := gsub("_protist24426", "", Species)] # remove appendix from 2011
protist.bacterivore[grep("_protist24466", Species), Species := gsub("_protist24466", "", Species)] # remove appendix from 2017

length(setdiff(unique(protist.bacterivore[Year == "2011", Species]), unique(protist.bacterivore[Year == "2011_2012", Species])))
length(intersect(unique(protist.bacterivore[Year == "2017", Species]), unique(protist.bacterivore[Year == "2011_2012", Species])))
# length(unique(protist.bacterivore[Year == "2011", Species]))
# length(unique(protist.bacterivore[Year == "2011_2012", Species]))
year 2011 2011_2012 2017 2011 2017 2017 2011_2012
no Plots 149 150 150 149 149
no OTUs 856 233 863 833+23+30 0

No overlap between 2011 and 2011_2012 and 2017 and 2011_2012.

For BetaDivMultifun, only year 2011 is chosen :

protist.bacterivore <- protist.bacterivore[Year == "2011", .(Species, Plot, value)]
nrow(protist.bacterivore) == 149 * 856 # TRUE : The expected number of rows is present.
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "protist.bacterivore"]

protist.eukaryvore

protist.eukaryvore <- spdiv[Trophic_level == "protist.eukaryvore"]
testplotpresence <- unique(protist.eukaryvore[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override

rm(testplotpresence)

Only 1 plot missing, in 2011.

# code for OTU renaming, in case they would like to be combined.
protist.eukaryvore[grep("_protist24426", Species), Species := gsub("_protist24426", "", Species)] # clean 2011
protist.eukaryvore[grep("_protist24466", Species), Species := gsub("_protist24466", "", Species)] # clean 2017
# interchange intersect with setdiff and modify years to obtain values in table below
length(setdiff(unique(protist.eukaryvore[Year == "2017", Species]), unique(protist.eukaryvore[Year == "2011", Species])))
length(unique(protist.eukaryvore[Year == "2011", Plot]))
Year 2011 2017 both
no Plots 149 150 149
no OTUs 211 210 204+7+6

For BetaDivMultifun, only year 2011 is chosen :

protist.eukaryvore <- protist.eukaryvore[Year == "2011", .(Species, Plot, value)]
nrow(protist.eukaryvore) == 149 * 211 # TRUE : The expected number of rows is present.
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "protist.eukaryvore"]

protist.omnivore

protist.omnivore <- spdiv[Trophic_level == "protist.omnivore"]
testplotpresence <- unique(protist.omnivore[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override

rm(testplotpresence)

Only 1 plot missing, in 2011.

# code for enaming of OTUs in case years would be combined.
protist.omnivore[grep("_protist24426", Species), Species := gsub("_protist24426", "", Species)] # clean 2011
protist.omnivore[grep("_protist24466", Species), Species := gsub("_protist24466", "", Species)] # clean 2017
# interchange intersect with setdiff and modify years to obtain values in table below
length(intersect(unique(protist.omnivore[Year == "2011", Species]), unique(protist.omnivore[Year == "2017", Species])))
length(unique(protist.omnivore[Year == "2017", Species]))
Year 2011 2017 both
no Plots 149 150 149
no OTUs 440 434 430+10+4

Only Year 2011 is chosen.

protist.omnivore <- protist.omnivore[Year == "2011", .(Species, Plot, value)]
nrow(protist.omnivore) == 149 * 440 # expected number of rows is present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "protist.omnivore"]

protist.plant.parasite

protist.plant.parasite <- spdiv[Trophic_level == "protist.plant.parasite"]
testplotpresence <- unique(protist.plant.parasite[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override

rm(testplotpresence)

Only 1 plot missing, in 2011.

# Code for OTU renaming, in case years would be combined.
protist.plant.parasite[grep("_protist24426", Species), Species := gsub("_protist24426", "", Species)] # clean 2011
protist.plant.parasite[grep("_protist24466", Species), Species := gsub("_protist24466", "", Species)] # clean 2017
# interchange intersect with setdiff and modify years to obtain values in table below
length(setdiff(unique(protist.plant.parasite[Year == "2017", Species]), unique(protist.plant.parasite[Year == "2011", Species])))
length(unique(protist.plant.parasite[Year == "2017", Plot]))
Year 2011 2017 both
no Plots 149 150 149
no OTUs 96 95 95+1+0

Only 2011 is chosen

protist.plant.parasite <- protist.plant.parasite[Year == "2011", .(Species, Plot, value)]
nrow(protist.plant.parasite) == 149 * 96
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "protist.plant.parasite"]

soilfungi.decomposer

Years 2011, 2014, 2017

soilfungi.decomposer <- spdiv[Trophic_level == "soilfungi.decomposer"]
testplotpresence <- unique(soilfungi.decomposer[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override

rm(testplotpresence)

Check if OTU need renaming and check overlap of OTUs

length(unique(soilfungi.decomposer[Year == "2017", Species]))
length(setdiff(unique(soilfungi.decomposer[Year == "2017", Species]), unique(soilfungi.decomposer[Year == "2014", Species])))

length(intersect(intersect(unique(soilfungi.decomposer[Year == "2011", Species]), unique(soilfungi.decomposer[Year == "2017", Species])), unique(soilfungi.decomposer[Year == "2014", Species])))
Year 2011 2014 2017 11 14 11 17 14 17 all
no Plots 150 150 150 150 150 150 150
no OTUs 12342 10772 10971 9403+2939+1369 9368+2974+1603 8391+2381+2580 7420+(see left)

Only year 2011 is chosen

soilfungi.decomposer <- soilfungi.decomposer[Year == "2011", .(Species, Plot, value)]
# adding the missing plot - species combinations . all species-plot combinations with 0 had been removed to store memory
# reconstruct them (code from Caterina)
soilfungi.decomposer <- data.table::setDT(soilfungi.decomposer)[data.table::CJ(Species=Species,Plot=Plot,unique=T), on=.(Species, Plot)]
soilfungi.decomposer[is.na(value), value := 0 ]
nrow(soilfungi.decomposer) == 150 * 12342 # TRUE : now, all expected rows are present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "soilfungi.decomposer"]

soilfungi.pathotroph

soilfungi.pathotroph <- spdiv[Trophic_level == "soilfungi.pathotroph"]
testplotpresence <- unique(soilfungi.pathotroph[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override

rm(testplotpresence)

No missing plots

# code to fill table below
length(unique(soilfungi.pathotroph[Year == "2017", Species]))
length(setdiff(unique(soilfungi.pathotroph[Year == "2017", Species]), unique(soilfungi.pathotroph[Year == "2011", Species])))

length(intersect(intersect(unique(soilfungi.pathotroph[Year == "2011", Species]), unique(soilfungi.pathotroph[Year == "2017", Species])), unique(soilfungi.pathotroph[Year == "2014", Species])))
Year 2011 2014 2017 11 14 11 17 14 17 all
no Plots 150 150 150 150 150 150 150
no OTUs 4097 6313 2924 2852+1245+3461 2852+1245+304 2134+4179+790 1934+(see left)

Only 2011 data is used here.

soilfungi.pathotroph <- soilfungi.pathotroph[Year == "2011", .(Species, Plot, value)]
# all plot-species combinations with 0 had been removed to save memory. They are added back now. (code from Caterina)
soilfungi.pathotroph <- data.table::setDT(soilfungi.pathotroph)[data.table::CJ(Species=Species,Plot=Plot,unique=T), on=.(Species, Plot)]
soilfungi.pathotroph[is.na(value), value := 0 ]
nrow(soilfungi.pathotroph) == 150 * 4097 # TRUE : all expected rows present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "soilfungi.pathotroph"]

soilfungi.symbiont

soilfungi.symbiont <- spdiv[Trophic_level == "soilfungi.symbiont"]
testplotpresence <- unique(soilfungi.symbiont[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override

rm(testplotpresence)

No missing plots

length(unique(soilfungi.symbiont[Year == "2017", Species]))

length(setdiff(unique(soilfungi.symbiont[Year == "2017", Species]), unique(soilfungi.symbiont[Year == "2014", Species])))

length(intersect(intersect(unique(soilfungi.symbiont[Year == "2011", Species]), unique(soilfungi.symbiont[Year == "2017", Species])), unique(soilfungi.symbiont[Year == "2014", Species])))
Year 2011 2014 2017 11 14 11 17 14 17 all
no Plots 150 150 150 150 150 150 150
no OTUs 1416 1478 1445 1093+323+385 1057+359+388 1080+398+365 914+(see left)

Only 2011 is chosen.

soilfungi.symbiont <- soilfungi.symbiont[Year == "2011", .(Species, Plot, value)]
# adding back missing plot-species combinations (code from Caterina)
soilfungi.symbiont <- data.table::setDT(soilfungi.symbiont)[data.table::CJ(Species=Species,Plot=Plot,unique=T), on=.(Species, Plot)]
soilfungi.symbiont[is.na(value), value := 0 ]
nrow(soilfungi.symbiont) == 150 * 1416 # all expected combinations present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "soilfungi.symbiont"]

check and select trophic levels with only one year

belowground herbivore

belowground.herbivore <- spdiv[Trophic_level == "belowground.herbivore", .(Species, Plot, value)]
spdiv <- spdiv[!Trophic_level == "belowground.herbivore"]
# testplotpresence <- unique(belowground.herbivore[, .(Year, Plot)])
# vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
# rm(testplotpresence)
nrow(belowground.herbivore) == 150 * 11 # expected number of rows present
## [1] TRUE

All plots are present, only one year (simple situation - no plot is shown).

hist(belowground.herbivore$value, main="Presence of belowground herbivores, 11 species", sub="The plot indicates that most species - Plot combinations are not present. This is a first indicator for relatively high (acceptable high) beta-diversity.", xlab = "")
bh_overv <- aggregate(value ~ Plot, belowground.herbivore, sum)
barplot(height = bh_overv$value)
abline(h = mean(bh_overv$value), col = "red")
abline(h = median(bh_overv$value), col = "green")

Most plots contain 3 species. 5 Plots do not contain ANY species! Need to be removed from the dataset.

#TODO : exclude plots with zero species until we find a solution. 
# Problem : betadiversity can't be calculated on plots without species.
zeroplots <- bh_overv$Plot[which(bh_overv$value == 0)]
belowground.herbivore <- belowground.herbivore[!Plot %in% zeroplots]
rm(zeroplots) ; rm(bh_overv)

Calculate betadiversity to check wether plots differ in their species composition / turnover.

beta.belowground.herbivore <- prepare_for_betapair(belowground.herbivore)
beta.belowground.herbivore <- betapart::beta.pair(beta.belowground.herbivore, index.family = "sorensen")

corrplot::corrplot(as.matrix(beta.belowground.herbivore$beta.sor),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)

corrplot::corrplot(as.matrix(beta.belowground.herbivore$beta.sim),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)

corrplot::corrplot(as.matrix(beta.belowground.herbivore$beta.sne),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)

There is definitely some betadiversity among the plots.

nrow(belowground.herbivore) == 145 * 11 # expected number of plots present
## [1] TRUE
# cleaning
rm(beta.belowground.herbivore)

belowground predator

belowground.predator <- spdiv[Trophic_level == "belowground.predator"]
nrow(belowground.predator) == 17 * 150 # expected number of plots present
## [1] TRUE
bp_overv <- aggregate(value ~ Plot, belowground.predator, sum)
barplot(height = bp_overv$value)
abline(h = mean(bp_overv$value), col = "red")
abline(h = median(bp_overv$value), col = "green")

There is 1 plot in belowground predators which does not contain any species.

#TODO : find out how to handle plots without species
zeroplots <- bp_overv$Plot[which(bp_overv$value == 0)]
belowground.predator <- belowground.predator[!Plot %in% zeroplots]
rm(zeroplots); rm(bp_overv)
spdiv <- spdiv[!Trophic_level == "belowground.predator"]

calc betadiversity and plot

beta.belowground.predator <- prepare_for_betapair(belowground.predator)
beta.belowground.predator <- betapart::beta.pair(beta.belowground.predator, index.family = "sorensen")

corrplot::corrplot(as.matrix(beta.belowground.predator$beta.sor),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)

corrplot::corrplot(as.matrix(beta.belowground.predator$beta.sim),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)

corrplot::corrplot(as.matrix(beta.belowground.predator$beta.sne),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)

nrow(belowground.predator) == 149 * 17 # expected number of rows present
## [1] TRUE
# cleaning
rm(beta.belowground.predator)

decomposer

decomposer <- spdiv[Trophic_level == "decomposer", .(Species, Plot, value)]
# testplotpresence <- unique(decomposer[, .(Year, Plot)])
# vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
# rm(testplotpresence)
bp_overv <- aggregate(value ~ Plot, decomposer, sum)
barplot(height = bp_overv$value)
abline(h = mean(bp_overv$value), col = "red")
abline(h = median(bp_overv$value), col = "green")

#TODO HERE! There are so many plots without decomposers - no idea if it still worth it... Include them or not?
length(unique(decomposer$Species)) # even though 40 species, there are so few present.
## [1] 40
# sum(bp_overv$value == 0) # 81 plots have species, 58 don't, total 139.
#TODO : take out for the moment, until decision
trlevels <- trlevels[-which(trlevels == "decomposer")]
spdiv <- spdiv[!Trophic_level == "decomposer"]
rm(decomposer)

plant pathogen

plant.pathogen <- spdiv[Trophic_level == "plant.pathogen"]
spdiv <- spdiv[!Trophic_level == "plant.pathogen"]
bp_overv <- aggregate(value ~ Plot, plant.pathogen, sum)
barplot(height = bp_overv$value)
abline(h = mean(bp_overv$value), col = "red")
abline(h = median(bp_overv$value), col = "green")

remove missing plots

plant.pathogen <- plant.pathogen[!Plot %in% bp_overv$Plot[which(bp_overv$value == 0)]]
# fill in missing combinations
plant.pathogen <- data.table::setDT(plant.pathogen)[data.table::CJ(Species=Species,Plot=Plot,unique=T), on=.(Species, Plot)]
plant.pathogen[is.na(value), value := 0 ]
nrow(plant.pathogen) == 84 * 148
## [1] TRUE

pollinator

pollinator <- spdiv[Trophic_level == "pollinator"]
spdiv <- spdiv[!Trophic_level == "pollinator"]
bp_overv <- aggregate(value ~ Plot, pollinator, sum)
barplot(height = bp_overv$value)
abline(h = mean(bp_overv$value), col = "red")
abline(h = median(bp_overv$value), col = "green")

pollinator <- pollinator[!Plot %in% bp_overv$Plot[which(bp_overv$value == 0)]]
# fill in missing combinations
pollinator <- data.table::setDT(pollinator)[data.table::CJ(Species=Species,Plot=Plot,unique=T), on=.(Species, Plot)]
pollinator[is.na(value), value := 0 ]
nrow(pollinator) == 705 * 144
## [1] TRUE
# cleaning
rm(bp_overv)

144 Plots with 705 species.

Find plot Selection

Find subset of plots without missing diversity data and store.

Is there any plot with zero species?

for(t in trlevels){
  print(t)
  tr <- get(t)
  test <- aggregate(value ~ Plot, tr, sum)
  print(test$Plot[which(test$value == 0)])
}

There is no more plot with zero species in the dataset.

Select the plots without missing information and store.

plots <- list()
for(t in trlevels){
  tr <- get(t)
  plots[[t]] <- unique(tr$Plot)
}
common.plots <- Reduce(intersect,plots) #128 plots
saveRDS(common.plots, file = "plotNAset.rds")

redplotset <- c()
for(t in trlevels){
  redplotset[t] <- length(Reduce(intersect, plots[-which(names(plots) == t)]))
}

par(mar = c(10, 4.1, 4.1, 2.1))
barplot(redplotset, ylim = c(0, 140), las = 3, cex.names = 0.8, main = "Does removal of given trophic level increase number of plots?")
abline(h = 128, col = "darkgreen")

Inspect common plot set

paste("AEG :", length(grep("A", common.plots)), "   HEG :", length(grep("H", common.plots)), "    SEG :", length(grep("S", common.plots)), sep = "")
region no. plots
all 128
AEG 40
HEG 43
SEG 45

Select Plots from diversity datasets

Only keep the selected plots

for(t in trlevels){
  tr <- get(t)
  tr <- tr[Plot %in% common.plots, ]
  assign(t, tr)
}

Calculate betadiversities

for(t in trlevels){
  tr <- get(t)
  b.tr <- prepare_for_betapair(tr)
  b.tr <- betapart::beta.pair(b.tr, index.family = "sorensen")
  assign(t, b.tr)
}

Cleaning

rm(spdiv); rm(test); rm(tr); rm(t); rm(redplotset); rm(plots); rm(b.tr)